library(qualtRics)
library(magrittr)
library(dplyr)
library(emmeans)
library(tidyr)
library(ggplot2)
library(estimatr)
library(stm)
library(expss)
library(labelled)
library(vader)
library(hrbrthemes)
library(stringr)
library(readr)

# Recode data ---------------------------------------------------------
batch <- read_csv("data/mturk_codes2.csv")

# import data
w1 <- qualtRics::read_survey("data/study2_w1.csv") %>%
 filter(!duplicated(participantId)) %>%
 mutate(participantId = paste(participantId)) %>%
 filter(Progress == 100)

w2 <- qualtRics::read_survey("data/study2_w2.csv") %>%
 filter(!duplicated(participantId)) %>%
 mutate(participantId = paste(participantId)) %>%
 filter(Progress == 100)

study2 <- w1 %>%
 inner_join(w2, by = "participantId")

text_combined <- study2 %>%
 transmute(text = paste(!!!select(., contains("acc")))) %>%
 mutate(text = gsub("NA", "", text)) %>%
 mutate(text = gsub("[^A-Za-z ]","",text)) %>%
 mutate(text = stringr::str_trim(text)) %>%
 pull(text)

# rename prompt variables
prompts <- tibble(name = colnames(study2), # tibble of var names, labels
                  label = as.character(labelled::var_label((study2)))) %>%
  mutate(name = ifelse(!str_detect(name, "^acc") | str_detect(name, "DO_"),
                       ifelse(!(str_detect(name, "Q69_") |
                                  str_detect(name, "Q70_") |
                                  str_detect(name, "Q72_") |
                                  str_detect(name, "Q73_")),
                              NA, name),
                       name), # NA if irrelevant
         label = ifelse(is.na(name), NA, label),
         label = ifelse(str_detect(label,"_w"), # shorten labels, weak then strong
                        str_c(str_sub(label, -7, -4),
                              ifelse(str_detect(name, "Q69"), "_4p", ""),
                              ifelse(str_detect(name, "Q70"), "_4c", ""),
                              ifelse(str_detect(name, "Q72"), "_2p2c_p", ""),
                              ifelse(str_detect(name, "Q73"), "_2p2c_c", ""),
                              "_w"),
                        ifelse(str_detect(name, "2p2c"),
                               str_c(str_sub(label, -5, -2), str_sub(name, 4, 10)),
                               str_c(str_sub(label, -5, -2), str_sub(name, 4, 6)))))

i1 <- match(colnames(study2), prompts$name, nomatch = 0) # find prompt variables
colnames(study2)[i1] <- prompts$label[i1] # replace var names with short labels

# function takes elements of study2$con and the relevant response columns,
# then returns the response (as character or NA)
response <- function(info, pro, con, mixed){
 if_else(info == "4p", as.character(pro),
 if_else(info == "4c", as.character(con),
 if_else(info == "2p2c", as.character(mixed), NA_character_)))
}

study2_mturk <- study2 %>%
 transmute(id = participantId,
           pro1, pro1_r = response(con, pro1_4p, pro1_4c, pro1_2p2c_p),
           pro2, pro2_r = response(con, pro2_4p, pro2_4c, pro2_2p2c_p),
           pro3, pro3_r = response(con, pro3_4p, pro3_4c, pro3_2p2c_p),
           pro4, pro4_r = response(con, pro4_4p, pro4_4c, pro4_2p2c_p),
           con1, con1_r = response(con, con1_4p, con1_4c, con1_2p2c_c),
           con2, con2_r = response(con, con2_4p, con2_4c, con2_2p2c_c),
           con3, con3_r = response(con, con3_4p, con3_4c, con3_2p2c_c),
           con4, con4_r = response(con, con4_4p, con4_4c, con4_2p2c_c),
           pro1_w, pro1_w_r = response(con_weak, pro1_4p_w, pro1_4c_w, pro1_2p2c_p_w),
           pro2_w, pro2_w_r = response(con_weak, pro2_4p_w, pro2_4c_w, pro2_2p2c_p_w),
           pro3_w, pro3_w_r = response(con_weak, pro3_4p_w, pro3_4c_w, pro3_2p2c_p_w),
           pro4_w, pro4_w_r = response(con_weak, pro4_4p_w, pro4_4c_w, pro4_2p2c_p_w),
           con1_w, con1_w_r = response(con_weak, con1_4p_w, con1_4c_w, con1_2p2c_c_w),
           con2_w, con2_w_r = response(con_weak, con2_4p_w, con2_4c_w, con2_2p2c_c_w),
           con3_w, con3_w_r = response(con_weak, con3_4p_w, con3_4c_w, con3_2p2c_c_w),
           con4_w, con4_w_r = response(con_weak, con4_4p_w, con4_4c_w, con4_2p2c_c_w))

# create spreadsheet for MTurk validation
study2_mturk %>%
  transmute(id,
            pro1_task = ifelse(!is.na(pro1_r),
                               str_c("<b>Prompt</b>: ", pro1,
                                     "<br><b>Response</b>: ", pro1_r), NA),
            pro2_task = ifelse(!is.na(pro2_r),
                               str_c("<b>Prompt</b>: ", pro2,
                                     "<br><b>Response</b>: ", pro2_r), NA),
            pro3_task = ifelse(!is.na(pro3_r),
                               str_c("<b>Prompt</b>: ", pro3,
                                     "<br><b>Response</b>: ", pro3_r), NA),
            pro4_task = ifelse(!is.na(pro4_r),
                               str_c("<b>Prompt</b>: ", pro4,
                                     "<br><b>Response</b>: ", pro4_r), NA),
            con1_task = ifelse(!is.na(con1_r),
                               str_c("<b>Prompt</b>: ", con1,
                                     "<br><b>Response</b>: ", con1_r), NA),
            con2_task = ifelse(!is.na(con2_r),
                               str_c("<b>Prompt</b>: ", con2,
                                     "<br><b>Response</b>: ", con2_r), NA),
            con3_task = ifelse(!is.na(con3_r),
                               str_c("<b>Prompt</b>: ", con3,
                                     "<br><b>Response</b>: ", con3_r), NA),
            con4_task = ifelse(!is.na(con4_r),
                               str_c("<b>Prompt</b>: ", con4,
                                     "<br><b>Response</b>: ", con4_r), NA),
            pro1_w_task = ifelse(!is.na(pro1_w_r),
                                 str_c("<b>Prompt</b>: ", pro1_w,
                                       "<br><b>Response</b>: ", pro1_w_r), NA),
            pro2_w_task = ifelse(!is.na(pro2_w_r),
                                 str_c("<b>Prompt</b>: ", pro2_w,
                                       "<br><b>Response</b>: ", pro2_w_r), NA),
            pro3_w_task = ifelse(!is.na(pro3_w_r),
                                 str_c("<b>Prompt</b>: ", pro3_w,
                                       "<br><b>Response</b>: ", pro3_w_r), NA),
            pro4_w_task = ifelse(!is.na(pro4_w_r),
                                 str_c("<b>Prompt</b>: ", pro4_w,
                                       "<br><b>Response</b>: ", pro4_w_r), NA),
            con1_w_task = ifelse(!is.na(con1_w_r),
                                 str_c("<b>Prompt</b>: ", con1_w,
                                       "<br><b>Response</b>: ", con1_w_r), NA),
            con2_w_task = ifelse(!is.na(con2_w_r),
                                 str_c("<b>Prompt</b>: ", con2_w,
                                       "<br><b>Response</b>: ", con2_w_r), NA),
            con3_w_task = ifelse(!is.na(con3_w_r),
                                 str_c("<b>Prompt</b>: ", con3_w,
                                       "<br><b>Response</b>: ", con3_w_r), NA),
            con4_w_task = ifelse(!is.na(con4_w_r),
                                 str_c("<b>Prompt</b>: ", con4_w,
                                       "<br><b>Response</b>: ", con4_w_r), NA)) %>%
  pivot_longer(cols = pro1_task:con4_w_task) %>%
  na.omit() %>%
  mutate(con = ifelse(str_detect(name, "w"), "weak", "strong")) %>%
  group_by(id, con) %>%
  summarize(html = paste(value, collapse = '<br><br>'),
            n = n()) -> mturk

strong <- study2 %>%
 transmute(id = 1:nrow(study2),
           participantId = participantId,
           likert = likert.x,
           information = plyr::mapvalues(con,
                                         c("4p", "2p2c", "4c"),
                                         c("pro", "mixed", "con")),
           motivation = relevel(as.factor(condition_strong), "accuracy"),
           pre_attitudes = pre_measure_strong_1,
           pre_certainty = pre_certainty_strong_19,
           pre_duration = Q52,
           pre_durability = Q53,
           pre_certain1 = Q51_1,
           pre_certain2 = Q51_2,
           pre_certain3 = Q51_3,
           pre_certain4 = Q51_4,
           pre_certain5 = Q51_5,
           pre_certain6 = Q51_6,
           pre_certain7 = Q51_7,
           topic = likert_topic,
           accuracy_con1 = rate_acc_stro_con1,
           accuracy_con2 = rate_acc_stro_con2,
           accuracy_con3 = rate_acc_stro_con3,
           accuracy_pro1 = rate_acc_stro_pro1,
           accuracy_pro2 = rate_acc_stro_pro2,
           accuracy_pro3 = rate_acc_stro_pro3,
           strength_con1 = rate_strength_stro_con1,
           strength_con2 = rate_strength_stro_con2,
           strength_con3 = rate_strength_stro_con3,
           strength_pro1 = rate_strength_stro_pro1,
           strength_pro2 = rate_strength_stro_pro2,
           strength_pro3 = rate_strength_stro_pro3,
           post_attitudes = post_outcome_1,
           post_certainty = post_certainty_19,
           post_certain1 = post_certain_multi_S_1,
           post_certain2 = post_certain_multi_S_2,
           post_certain3 = post_certain_multi_S_3,
           post_certain4 = post_certain_multi_S_4,
           post_certain5 = post_certain_multi_S_5,
           post_certain6 = post_certain_multi_S_6,
           post_certain7 = post_certain_multi_S_7,
           polsoph1 = as.numeric(polsoph1 == 2),
           polsoph2 = as.numeric(polsoph2 == 1),
           pid3, birthyr,
           ideology,
           polsoph3 = as.numeric(grepl("2/3|thirds|67", polsoph3, ignore.case = T)),
           polsoph4 = as.numeric(grepl("court|judicial|scotus", polsoph4, ignore.case = T)),
           polsoph5 = as.numeric(grepl("vice|vp", polsoph5, ignore.case = T)),
           timer = log(`time_thought_str_Page Submit`),
           generic_flag = as.numeric(is.na(generic_flag)),
           pre_attitudes_tertiles = factor(ntile(pre_attitudes, 3), 1:3, c("Tertile 1", "Tertile 2", "Tertile 3")),
           pre_certainty_tertiles = factor(ntile(pre_certainty, 3), 1:3, c("Tertile 1", "Tertile 2", "Tertile 3")),
           text = text_combined,
           condition = "strong")

weak <- study2 %>%
 transmute(id = 1:nrow(study2),
           participantId = participantId,
           likert = likert_weak.x,
           information = plyr::mapvalues(con_weak,
                                         c("4p", "2p2c", "4c"),
                                         c("pro", "mixed", "con")),
           motivation = relevel(as.factor(condition_weak), "accuracy"),
           pre_attitudes = pre_attitude_weak_1,
           pre_certainty = pre_certainty_weak_19,
           pre_duration = duration_weak,
           pre_durability = durability_weak,
           pre_certain1 = certainty_multi_weak_1,
           pre_certain2 = certainty_multi_weak_2,
           pre_certain3 = certainty_multi_weak_3,
           pre_certain4 = certainty_multi_weak_4,
           pre_certain5 = certainty_multi_weak_5,
           pre_certain6 = certainty_multi_weak_6,
           pre_certain7 = certainty_multi_weak_7,
           post_attitudes = post_attitude_weak_1,
           post_certainty = post_certainty_weak_19,
           post_certain1 = post_certain_multi_w_1,
           post_certain2 = post_certain_multi_w_2,
           post_certain3 = post_certain_multi_w_3,
           post_certain4 = post_certain_multi_w_4,
           post_certain5 = post_certain_multi_w_5,
           post_certain6 = post_certain_multi_w_6,
           post_certain7 = post_certain_multi_w_7,
           topic = likert_weak_topic,
           accuracy_con1 = rate_acc_weak_con1_w,
           accuracy_con2 = rate_acc_weak_con2_w,
           accuracy_con3 = rate_acc_weak_con3_w,
           accuracy_pro1 = rate_acc_weak_pro1_w,
           accuracy_pro2 = rate_acc_weak_pro2_w,
           accuracy_pro3 = rate_acc_weak_pro3_w,
           strength_con1 = rate_strength_weak_con1_w,
           strength_con2 = rate_strength_weak_con2_w,
           strength_con3 = rate_strength_weak_con3_w,
           strength_pro1 = rate_strength_weak_pro1_w,
           strength_pro2 = rate_strength_weak_pro2_w,
           strength_pro3 = rate_strength_weak_pro3_w,

           polsoph1 = as.numeric(polsoph1 == 2),
           polsoph2 = as.numeric(polsoph2 == 1),
           pid3, birthyr,
           ideology,
           polsoph3 = as.numeric(grepl("2/3|thirds|67", polsoph3, ignore.case = T)),
           polsoph4 = as.numeric(grepl("court|judicial|scotus", polsoph4, ignore.case = T)),
           polsoph5 = as.numeric(grepl("vice|vp", polsoph5, ignore.case = T)),
           timer = log(`time_thought_weak_Page Submit`),
           generic_flag = as.numeric(is.na(generic_flag_w)),
           pre_attitudes_tertiles = factor(ntile(pre_attitudes, 3), 1:3, c("Tertile 1", "Tertile 2", "Tertile 3")),
           pre_certainty_tertiles = factor(ntile(pre_certainty, 3), 1:3, c("Tertile 1", "Tertile 2", "Tertile 3")),
           text = text_combined,
           condition = "weak")

combined_df <- bind_rows(strong, weak)
combined_df %>% filter(generic_flag == 1) -> combined_df

pre_certain_reliability <- combined_df %>%
 select(contains("pre_certain")) %>%
 select(-pre_certainty, -pre_certainty_tertiles) %>%
 psych::alpha()

combined_df$pre_certain_scores <- pre_certain_reliability$scores

post_certain_reliability <- combined_df %>%
 select(contains("post_certain")) %>%
 select(-post_certainty) %>%
 psych::alpha()

combined_df$post_con_strength <- with(combined_df,
                          (strength_con1+strength_con2+strength_con3)/3)

combined_df$post_pro_strength <- with(combined_df,
                          (strength_pro1+strength_pro2+strength_pro3)/3)

combined_df$post_con_acc <- with(combined_df,
                                      (accuracy_con1+accuracy_con2+accuracy_con3)/3)

combined_df$post_pro_acc <- with(combined_df,
                                      (accuracy_pro1+accuracy_pro2+accuracy_pro3)/3)

combined_df$post_certain_scores <- post_certain_reliability$scores

# create accuracy scale
accuracy_scale_reliability <- combined_df %>%
 select(contains(c("accuracy_con", "accuracy_pro"))) %>%
 psych::alpha(check.keys = T)

combined_df$post_accuracy <- accuracy_scale_reliability$scores

# create strength scale
strength_scale_reliability <- combined_df %>%
 select(contains(c("strength_con", "strength_pro"))) %>%
 psych::alpha(check.keys = T)

combined_df$post_strength <- strength_scale_reliability$scores

# create pk scale
combined_df$pk_scale <- combined_df %>% select(contains("polsoph")) %>% rowSums()/5

combined_df %>%
 mutate(pk_scale_tertiles = factor(ntile(pk_scale, 3), 1:3,
                                   c("Tertile 1", "Tertile 2", "Tertile 3"))) -> combined_df


# recode information
combined_df$information <- relevel(as.factor(combined_df$information), "pro")


# clean up factor levels
combined_df %<>%
 mutate(information = plyr::mapvalues(information,
                                     c("con", "mixed", "pro"),
                                     c("Con",
                                       "Mixed",
                                       "Pro")),
       `Issue Strength` = plyr::mapvalues(condition,
                                          c("strong", "weak"),
                                          c("Strong",
                                            "Weak")),
       motivation = plyr::mapvalues(motivation,
                                    c("accuracy", "directional"),
                                    c("Accuracy Prime",
                                      "Directional Prime")))


combined_df %>% left_join(batch %>% transmute(study.id = Input.id, condition = Input.con,
                                              denigrating_comments = as.numeric(plyr::mapvalues(Answer.intent.label,
                                                                                                c("All responses agree with the prompt",
                                                                                                  "1 response rejects/disagrees with the prompt",
                                                                                                  "2 responses reject/disagree with the prompt",
                                                                                                  "3 responses reject/disagree with the prompt",
                                                                                                  "4 responses reject/disagree with the prompt"),
                                                                                                c(0,1,2,3,4)))) %>% group_by(study.id, condition) %>%
                            summarize(denigrating_comments = mean(denigrating_comments,na.rm=T)) %>% mutate(study.id = paste(study.id)), by = c("participantId" = "study.id", "condition")) %>%
  left_join(mturk %>% transmute(study.id = id, condition = con, n),
            by = c("participantId" = "study.id", "condition")) %>% mutate(share_den = denigrating_comments/n) -> combined_df

theme_params <-  theme(panel.background = element_rect(fill = 'gray95'),
                       panel.grid.minor = element_blank(),
                       panel.grid.major = element_blank(),
                       legend.text = element_text(size = 15),
                       axis.text.x = element_text(size = 15),
                       legend.title=element_blank(),
                       axis.text.y = element_text(size = 15),
                       axis.title.y = element_text(size = 15),
                       axis.title.x = element_text(size = 15),
                       legend.position = "bottom",
                       plot.margin = unit(c(.25,.25,.25,.25), "cm"),
                       strip.text.x = element_text(size = 15))
